library(readr)
##
## Attaching package: 'readr'
## The following objects are masked from 'package:scales':
##
## col_factor, col_numeric
rawlogs<-read_log("https://raw.githubusercontent.com/elastic/examples/master/ElasticStack_apache/apache_logs")
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_character(),
## X5 = col_character(),
## X6 = col_integer(),
## X7 = col_integer(),
## X8 = col_character(),
## X9 = col_character()
## )
## Warning: 16 parsing failures.
## row col expected actual
## 4031 -- 9 columns 5 columns
## 4192 -- 9 columns 5 columns
## 8897 X6 an integer U;
## 8897 X7 an integer )
## 8897 -- 9 columns 11 columns
## .... ... .......... ..........
## See problems(...) for more details.
library(data.table)
logs<-data.table(rawlogs)
knitr::kable(head(logs))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 |
|---|---|---|---|---|---|---|---|---|
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:03 +0000 | GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1 | 200 | 203023 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:43 +0000 | GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1 | 200 | 171717 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:47 +0000 | GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1 | 200 | 26185 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:12 +0000 | GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1 | 200 | 7697 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:07 +0000 | GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1 | 200 | 2892 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:34 +0000 | GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1 | 200 | 430406 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
Logs usually don’t have headers so you need to update the default column titles to something more expressive.
setnames(logs, colnames(logs)
,c( "ip", "identd", "uname", "time", "request", "status", "respsize", "referer", "agent"))
# http://stackoverflow.com/questions/9234699/understanding-apache-access-log
# %h is the remote host (ie the client IP)
# %l is the identity of the user determined by identd (not usually # used since not reliable)
# %u is the user name determined by HTTP authentication
# %t is the time the request was received.
# %r is the request line from the client. ("GET / HTTP/1.0")
# %>s is the status code sent from the server to the client (200, # 404 etc.)
# %b is the size of the response to the client (in bytes)
# Referer is the page that linked to this URL.
# User-agent is the browser identification string.
knitr::kable(head(logs))
| ip | identd | uname | time | request | status | respsize | referer | agent |
|---|---|---|---|---|---|---|---|---|
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:03 +0000 | GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1 | 200 | 203023 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:43 +0000 | GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1 | 200 | 171717 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:47 +0000 | GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1 | 200 | 26185 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:12 +0000 | GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1 | 200 | 7697 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:07 +0000 | GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1 | 200 | 2892 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
| 83.149.9.216 | NA | NA | 17/May/2015:10:05:34 +0000 | GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1 | 200 | 430406 | http://semicomplete.com/presentations/logstash-monitorama-2013/ | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36 |
There is date handling capability out of the box with R, however, the lubridate package makes it easier to convert strings to dates, and perform manipulations.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, mday, month, quarter, wday, week, yday, year
## The following object is masked from 'package:base':
##
## date
logs[,time:=dmy_hms(time)]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9992: 100.43.83.137 NA NA 2015-05-20 21:05:01
## 9993: 63.140.98.80 NA NA 2015-05-20 21:05:28
## 9994: 66.249.73.135 NA NA 2015-05-20 21:05:00
## 9995: 180.76.6.56 NA NA 2015-05-20 21:05:56
## 9996: 46.105.14.53 NA NA 2015-05-20 21:05:15
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9992: GET /blog/tags/standards HTTP/1.1
## 9993: \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994: GET /?flav=atom HTTP/1.1
## 9995: GET /robots.txt HTTP/1.1
## 9996: GET /blog/tags/puppet?flav=rss20 HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9992: 200 13358
## 9993: NA NA
## 9994: 200 32352
## 9995: 200 NA
## 9996: 200 14872
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9992: NA
## 9993: NA
## 9994: NA
## 9995: NA
## 9996: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9992: Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993: NA
## 9994: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996: UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
logs[,`:=`(hour=hour(time), wday=wday(time)
,morning=am(time))]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9992: 100.43.83.137 NA NA 2015-05-20 21:05:01
## 9993: 63.140.98.80 NA NA 2015-05-20 21:05:28
## 9994: 66.249.73.135 NA NA 2015-05-20 21:05:00
## 9995: 180.76.6.56 NA NA 2015-05-20 21:05:56
## 9996: 46.105.14.53 NA NA 2015-05-20 21:05:15
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9992: GET /blog/tags/standards HTTP/1.1
## 9993: \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994: GET /?flav=atom HTTP/1.1
## 9995: GET /robots.txt HTTP/1.1
## 9996: GET /blog/tags/puppet?flav=rss20 HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9992: 200 13358
## 9993: NA NA
## 9994: 200 32352
## 9995: 200 NA
## 9996: 200 14872
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9992: NA
## 9993: NA
## 9994: NA
## 9995: NA
## 9996: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9992: Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993: NA
## 9994: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996: UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
## hour wday morning
## 1: 10 1 TRUE
## 2: 10 1 TRUE
## 3: 10 1 TRUE
## 4: 10 1 TRUE
## 5: 10 1 TRUE
## ---
## 9992: 21 4 FALSE
## 9993: 21 4 FALSE
## 9994: 21 4 FALSE
## 9995: 21 4 FALSE
## 9996: 21 4 FALSE
logs[ , weekend:= wday %in% c(1,7)]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9992: 100.43.83.137 NA NA 2015-05-20 21:05:01
## 9993: 63.140.98.80 NA NA 2015-05-20 21:05:28
## 9994: 66.249.73.135 NA NA 2015-05-20 21:05:00
## 9995: 180.76.6.56 NA NA 2015-05-20 21:05:56
## 9996: 46.105.14.53 NA NA 2015-05-20 21:05:15
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9992: GET /blog/tags/standards HTTP/1.1
## 9993: \n63.140.98.80 - - [20/May/2015:21:05:50 +0000])
## 9994: GET /?flav=atom HTTP/1.1
## 9995: GET /robots.txt HTTP/1.1
## 9996: GET /blog/tags/puppet?flav=rss20 HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9992: 200 13358
## 9993: NA NA
## 9994: 200 32352
## 9995: 200 NA
## 9996: 200 14872
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9992: NA
## 9993: NA
## 9994: NA
## 9995: NA
## 9996: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9992: Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
## 9993: NA
## 9994: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
## 9995: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## 9996: UniversalFeedParser/4.2-pre-314-svn +http://feedparser.org/
## hour wday morning weekend
## 1: 10 1 TRUE TRUE
## 2: 10 1 TRUE TRUE
## 3: 10 1 TRUE TRUE
## 4: 10 1 TRUE TRUE
## 5: 10 1 TRUE TRUE
## ---
## 9992: 21 4 FALSE FALSE
## 9993: 21 4 FALSE FALSE
## 9994: 21 4 FALSE FALSE
## 9995: 21 4 FALSE FALSE
## 9996: 21 4 FALSE FALSE
There are few packages for resolving IPs: - rgeolocate - ggmap - iptools - ipapi (gh: hrbrmstr/ipapi)
Which one to use depends on API preferences, plus any additional requirements.
Play it smart - don’t call for every record, call for every unique record. Cache values where possible!
if(!require(ipapi)) devtools::install_github("hrbrmstr/ipapi")
## Loading required package: ipapi
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ipapi'
## Using GitHub PAT from envvar GITHUB_PAT
## Downloading GitHub repo hrbrmstr/ipapi@master
## from URL https://api.github.com/repos/hrbrmstr/ipapi/zipball/master
## Installing ipapi
## Installing pbapply
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save \
## --no-restore --quiet CMD INSTALL \
## '/tmp/RtmpxvtyVV/devtoolsfabb16a8ab57/pbapply' \
## --library='/home/travis/R/Library' --install-tests
##
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save \
## --no-restore --quiet CMD INSTALL \
## '/tmp/RtmpxvtyVV/devtoolsfabb48bd8ac4/hrbrmstr-ipapi-c612329' \
## --library='/home/travis/R/Library' --install-tests
##
library(ipapi)
ips<-logs[,unique(ip)]
example<-TRUE
iptblloc<-"https://raw.githubusercontent.com/stephlocke/lazyCDN/master/sampleIPtbl.csv"
ip_tbl<-if(example) fread(iptblloc)
## Warning in fread(iptblloc): Bumped column 13 to type character on data
## row 29, field contains 'EC4N'. Coercing previously read values in this
## column from logical, integer or numeric back to character which may not
## be lossless; e.g., if '00' and '000' occurred before they will now be just
## '0', and there may be inconsistencies with treatment of ',,' and ',NA,' too
## (if they occurred in this column before the bump). If this matters please
## rerun and set 'colClasses' to 'character' for this column. Please note
## that column type detection uses the first 5 rows, the middle 5 rows and the
## last 5 rows, so hopefully this message should be very rare. If reporting to
## datatable-help, please rerun and include the output from verbose=TRUE.
#ip_tbl<- ipapi::geolocate(ips)[, status:=NULL]
# Join IP results to log data
logs<-logs[ip_tbl, on=c(ip="query")]
head(logs)
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## 6: 83.149.9.216 NA NA 2015-05-17 10:05:34
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## 6: GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## 6: 200 430406
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 6: http://semicomplete.com/presentations/logstash-monitorama-2013/
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 6: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## hour wday morning weekend as city country
## 1: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## 2: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## 3: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## 4: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## 5: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## 6: 10 1 TRUE TRUE AS25159 PJSC MegaFon Moscow Russia
## countryCode isp lat lon org region regionName
## 1: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## 2: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## 3: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## 4: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## 5: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## 6: RU PJSC MegaFon 55.7522 37.6156 PJSC MegaFon MOW Moscow
## timezone zip
## 1: Europe/Moscow 101194
## 2: Europe/Moscow 101194
## 3: Europe/Moscow 101194
## 4: Europe/Moscow 101194
## 5: Europe/Moscow 101194
## 6: Europe/Moscow 101194
The format of the Apache request log means that the request component needs splitting up. The values are not always in quite the right format so you should always check for errors.
logs[,c("verb","url","scheme"):=tstrsplit(request," ")[1:3]]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9992: 38.99.236.50 NA NA 2015-05-20 21:05:48
## 9993: 38.99.236.50 NA NA 2015-05-20 21:05:42
## 9994: 38.99.236.50 NA NA 2015-05-20 21:05:29
## 9995: 38.99.236.50 NA NA 2015-05-20 21:05:31
## 9996: 180.76.6.56 NA NA 2015-05-20 21:05:56
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9992: GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9993: GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9994: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9995: GET /favicon.ico HTTP/1.1
## 9996: GET /robots.txt HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9992: 200 46139
## 9993: 200 663847
## 9994: 200 97173
## 9995: 200 3638
## 9996: 200 NA
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9992: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9993: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9994: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9995: NA
## 9996: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9992: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9993: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9994: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9995: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9996: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## hour wday morning weekend
## 1: 10 1 TRUE TRUE
## 2: 10 1 TRUE TRUE
## 3: 10 1 TRUE TRUE
## 4: 10 1 TRUE TRUE
## 5: 10 1 TRUE TRUE
## ---
## 9992: 21 4 FALSE FALSE
## 9993: 21 4 FALSE FALSE
## 9994: 21 4 FALSE FALSE
## 9995: 21 4 FALSE FALSE
## 9996: 21 4 FALSE FALSE
## as
## 1: AS25159 PJSC MegaFon
## 2: AS25159 PJSC MegaFon
## 3: AS25159 PJSC MegaFon
## 4: AS25159 PJSC MegaFon
## 5: AS25159 PJSC MegaFon
## ---
## 9992: AS174 Cogent Communications
## 9993: AS174 Cogent Communications
## 9994: AS174 Cogent Communications
## 9995: AS174 Cogent Communications
## 9996: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
## city country countryCode
## 1: Moscow Russia RU
## 2: Moscow Russia RU
## 3: Moscow Russia RU
## 4: Moscow Russia RU
## 5: Moscow Russia RU
## ---
## 9992: Miami United States US
## 9993: Miami United States US
## 9994: Miami United States US
## 9995: Miami United States US
## 9996: Beijing China CN
## isp lat lon
## 1: PJSC MegaFon 55.7522 37.6156
## 2: PJSC MegaFon 55.7522 37.6156
## 3: PJSC MegaFon 55.7522 37.6156
## 4: PJSC MegaFon 55.7522 37.6156
## 5: PJSC MegaFon 55.7522 37.6156
## ---
## 9992: Cogent Communications 25.8530 -80.2348
## 9993: Cogent Communications 25.8530 -80.2348
## 9994: Cogent Communications 25.8530 -80.2348
## 9995: Cogent Communications 25.8530 -80.2348
## 9996: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
## org region regionName
## 1: PJSC MegaFon MOW Moscow
## 2: PJSC MegaFon MOW Moscow
## 3: PJSC MegaFon MOW Moscow
## 4: PJSC MegaFon MOW Moscow
## 5: PJSC MegaFon MOW Moscow
## ---
## 9992: Cogent Communications FL Florida
## 9993: Cogent Communications FL Florida
## 9994: Cogent Communications FL Florida
## 9995: Cogent Communications FL Florida
## 9996: Beijing Baidu Netcom Science and Technology Co. 11 Beijing
## timezone zip verb
## 1: Europe/Moscow 101194 GET
## 2: Europe/Moscow 101194 GET
## 3: Europe/Moscow 101194 GET
## 4: Europe/Moscow 101194 GET
## 5: Europe/Moscow 101194 GET
## ---
## 9992: America/New_York 33147 GET
## 9993: America/New_York 33147 GET
## 9994: America/New_York 33147 GET
## 9995: America/New_York 33147 GET
## 9996: Asia/Shanghai GET
## url
## 1: /presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: /presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9992: /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9993: /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9994: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9995: /favicon.ico
## 9996: /robots.txt
## scheme
## 1: HTTP/1.1
## 2: HTTP/1.1
## 3: HTTP/1.1
## 4: HTTP/1.1
## 5: HTTP/1.1
## ---
## 9992: HTTP/1.1
## 9993: HTTP/1.1
## 9994: HTTP/1.1
## 9995: HTTP/1.1
## 9996: HTTP/1.1
# isolate issues!
issues<-logs[,!((verb %like% "^[A-Z]{3,}$")&
(scheme %like% "^HTTP"))]
errors<-logs[issues,]
logs<-logs[!issues, ]
library(urltools)
logs[,c("path","params"):=.(path(url),parameters(url))]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9982: 38.99.236.50 NA NA 2015-05-20 21:05:48
## 9983: 38.99.236.50 NA NA 2015-05-20 21:05:42
## 9984: 38.99.236.50 NA NA 2015-05-20 21:05:29
## 9985: 38.99.236.50 NA NA 2015-05-20 21:05:31
## 9986: 180.76.6.56 NA NA 2015-05-20 21:05:56
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9982: GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983: GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985: GET /favicon.ico HTTP/1.1
## 9986: GET /robots.txt HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9982: 200 46139
## 9983: 200 663847
## 9984: 200 97173
## 9985: 200 3638
## 9986: 200 NA
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985: NA
## 9986: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9982: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## hour wday morning weekend
## 1: 10 1 TRUE TRUE
## 2: 10 1 TRUE TRUE
## 3: 10 1 TRUE TRUE
## 4: 10 1 TRUE TRUE
## 5: 10 1 TRUE TRUE
## ---
## 9982: 21 4 FALSE FALSE
## 9983: 21 4 FALSE FALSE
## 9984: 21 4 FALSE FALSE
## 9985: 21 4 FALSE FALSE
## 9986: 21 4 FALSE FALSE
## as
## 1: AS25159 PJSC MegaFon
## 2: AS25159 PJSC MegaFon
## 3: AS25159 PJSC MegaFon
## 4: AS25159 PJSC MegaFon
## 5: AS25159 PJSC MegaFon
## ---
## 9982: AS174 Cogent Communications
## 9983: AS174 Cogent Communications
## 9984: AS174 Cogent Communications
## 9985: AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
## city country countryCode
## 1: Moscow Russia RU
## 2: Moscow Russia RU
## 3: Moscow Russia RU
## 4: Moscow Russia RU
## 5: Moscow Russia RU
## ---
## 9982: Miami United States US
## 9983: Miami United States US
## 9984: Miami United States US
## 9985: Miami United States US
## 9986: Beijing China CN
## isp lat lon
## 1: PJSC MegaFon 55.7522 37.6156
## 2: PJSC MegaFon 55.7522 37.6156
## 3: PJSC MegaFon 55.7522 37.6156
## 4: PJSC MegaFon 55.7522 37.6156
## 5: PJSC MegaFon 55.7522 37.6156
## ---
## 9982: Cogent Communications 25.8530 -80.2348
## 9983: Cogent Communications 25.8530 -80.2348
## 9984: Cogent Communications 25.8530 -80.2348
## 9985: Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
## org region regionName
## 1: PJSC MegaFon MOW Moscow
## 2: PJSC MegaFon MOW Moscow
## 3: PJSC MegaFon MOW Moscow
## 4: PJSC MegaFon MOW Moscow
## 5: PJSC MegaFon MOW Moscow
## ---
## 9982: Cogent Communications FL Florida
## 9983: Cogent Communications FL Florida
## 9984: Cogent Communications FL Florida
## 9985: Cogent Communications FL Florida
## 9986: Beijing Baidu Netcom Science and Technology Co. 11 Beijing
## timezone zip verb
## 1: Europe/Moscow 101194 GET
## 2: Europe/Moscow 101194 GET
## 3: Europe/Moscow 101194 GET
## 4: Europe/Moscow 101194 GET
## 5: Europe/Moscow 101194 GET
## ---
## 9982: America/New_York 33147 GET
## 9983: America/New_York 33147 GET
## 9984: America/New_York 33147 GET
## 9985: America/New_York 33147 GET
## 9986: Asia/Shanghai GET
## url
## 1: /presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: /presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: /favicon.ico
## 9986: /robots.txt
## scheme
## 1: HTTP/1.1
## 2: HTTP/1.1
## 3: HTTP/1.1
## 4: HTTP/1.1
## 5: HTTP/1.1
## ---
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
## path
## 1: presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: favicon.ico
## 9986: robots.txt
## params
## 1: NA
## 2: NA
## 3: NA
## 4: NA
## 5: NA
## ---
## 9982: NA
## 9983: NA
## 9984: NA
## 9985: NA
## 9986: NA
Often you need to worry about steps taken over time. The data.table package gives you an easy way to add IDs to rows or groups.
logs[order(time),`:=`(order=.SD[,.I], visit=.GRP), .(ip,agent)]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9982: 38.99.236.50 NA NA 2015-05-20 21:05:48
## 9983: 38.99.236.50 NA NA 2015-05-20 21:05:42
## 9984: 38.99.236.50 NA NA 2015-05-20 21:05:29
## 9985: 38.99.236.50 NA NA 2015-05-20 21:05:31
## 9986: 180.76.6.56 NA NA 2015-05-20 21:05:56
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9982: GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983: GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985: GET /favicon.ico HTTP/1.1
## 9986: GET /robots.txt HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9982: 200 46139
## 9983: 200 663847
## 9984: 200 97173
## 9985: 200 3638
## 9986: 200 NA
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985: NA
## 9986: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9982: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## hour wday morning weekend
## 1: 10 1 TRUE TRUE
## 2: 10 1 TRUE TRUE
## 3: 10 1 TRUE TRUE
## 4: 10 1 TRUE TRUE
## 5: 10 1 TRUE TRUE
## ---
## 9982: 21 4 FALSE FALSE
## 9983: 21 4 FALSE FALSE
## 9984: 21 4 FALSE FALSE
## 9985: 21 4 FALSE FALSE
## 9986: 21 4 FALSE FALSE
## as
## 1: AS25159 PJSC MegaFon
## 2: AS25159 PJSC MegaFon
## 3: AS25159 PJSC MegaFon
## 4: AS25159 PJSC MegaFon
## 5: AS25159 PJSC MegaFon
## ---
## 9982: AS174 Cogent Communications
## 9983: AS174 Cogent Communications
## 9984: AS174 Cogent Communications
## 9985: AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
## city country countryCode
## 1: Moscow Russia RU
## 2: Moscow Russia RU
## 3: Moscow Russia RU
## 4: Moscow Russia RU
## 5: Moscow Russia RU
## ---
## 9982: Miami United States US
## 9983: Miami United States US
## 9984: Miami United States US
## 9985: Miami United States US
## 9986: Beijing China CN
## isp lat lon
## 1: PJSC MegaFon 55.7522 37.6156
## 2: PJSC MegaFon 55.7522 37.6156
## 3: PJSC MegaFon 55.7522 37.6156
## 4: PJSC MegaFon 55.7522 37.6156
## 5: PJSC MegaFon 55.7522 37.6156
## ---
## 9982: Cogent Communications 25.8530 -80.2348
## 9983: Cogent Communications 25.8530 -80.2348
## 9984: Cogent Communications 25.8530 -80.2348
## 9985: Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
## org region regionName
## 1: PJSC MegaFon MOW Moscow
## 2: PJSC MegaFon MOW Moscow
## 3: PJSC MegaFon MOW Moscow
## 4: PJSC MegaFon MOW Moscow
## 5: PJSC MegaFon MOW Moscow
## ---
## 9982: Cogent Communications FL Florida
## 9983: Cogent Communications FL Florida
## 9984: Cogent Communications FL Florida
## 9985: Cogent Communications FL Florida
## 9986: Beijing Baidu Netcom Science and Technology Co. 11 Beijing
## timezone zip verb
## 1: Europe/Moscow 101194 GET
## 2: Europe/Moscow 101194 GET
## 3: Europe/Moscow 101194 GET
## 4: Europe/Moscow 101194 GET
## 5: Europe/Moscow 101194 GET
## ---
## 9982: America/New_York 33147 GET
## 9983: America/New_York 33147 GET
## 9984: America/New_York 33147 GET
## 9985: America/New_York 33147 GET
## 9986: Asia/Shanghai GET
## url
## 1: /presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: /presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: /favicon.ico
## 9986: /robots.txt
## scheme
## 1: HTTP/1.1
## 2: HTTP/1.1
## 3: HTTP/1.1
## 4: HTTP/1.1
## 5: HTTP/1.1
## ---
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
## path
## 1: presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: favicon.ico
## 9986: robots.txt
## params order visit
## 1: NA 2 1
## 2: NA 14 1
## 3: NA 16 1
## 4: NA 5 1
## 5: NA 3 1
## ---
## 9982: NA 24 1847
## 9983: NA 23 1847
## 9984: NA 14 1847
## 9985: NA 15 1847
## 9986: NA 1 1852
knitr::kable(logs[order==1,.N,path][
order(-N)[1:10],])
| path | N |
|---|---|
| favicon.ico | 221 |
| NA | 145 |
| presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png | 112 |
| images/web/2009/banner.png | 95 |
| reset.css | 91 |
| style2.css | 91 |
| images/googledotcom.png | 89 |
| robots.txt | 79 |
| images/jordan-80.png | 73 |
| projects/xdotool/xdotool.xhtml | 54 |
knitr::kable(logs[,.SD[which.max(order)],visit][
,.N,path][order(-N)[1:10],])
| path | N |
|---|---|
| favicon.ico | 215 |
| NA | 149 |
| presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png | 111 |
| images/googledotcom.png | 94 |
| images/jordan-80.png | 90 |
| images/web/2009/banner.png | 90 |
| style2.css | 83 |
| robots.txt | 75 |
| reset.css | 63 |
| projects/xdotool/xdotool.xhtml | 63 |
knitr::kable(logs[,.SD[which.max(order)],visit][
order==1,.N,path][order(-N)[1:10],])
| path | N |
|---|---|
| presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png | 105 |
| images/googledotcom.png | 89 |
| favicon.ico | 67 |
| NA | 62 |
| robots.txt | 43 |
| presentations/logstash-scale11x/images/logstash.png | 18 |
| projects/xdotool/ | 15 |
| images/jordan-80.png | 11 |
| articles/dynamic-dns-with-dhcp/ | 11 |
| blog/geekery/ssl-latency.html | 10 |
logs[status>=500, .N, .(path,status)][order(-N)[1:pmin(10, .N)]]
## path status N
## 1: misc/Title.php.txt 500 2
## 2: projects/xdotool/ 500 1
logs[order(order), timesinceprevrequest:= time - shift(time) , visit]
## ip identd uname time
## 1: 83.149.9.216 NA NA 2015-05-17 10:05:03
## 2: 83.149.9.216 NA NA 2015-05-17 10:05:43
## 3: 83.149.9.216 NA NA 2015-05-17 10:05:47
## 4: 83.149.9.216 NA NA 2015-05-17 10:05:12
## 5: 83.149.9.216 NA NA 2015-05-17 10:05:07
## ---
## 9982: 38.99.236.50 NA NA 2015-05-20 21:05:48
## 9983: 38.99.236.50 NA NA 2015-05-20 21:05:42
## 9984: 38.99.236.50 NA NA 2015-05-20 21:05:29
## 9985: 38.99.236.50 NA NA 2015-05-20 21:05:31
## 9986: 180.76.6.56 NA NA 2015-05-20 21:05:56
## request
## 1: GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1
## 2: GET /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png HTTP/1.1
## 3: GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1
## 4: GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1
## 5: GET /presentations/logstash-monitorama-2013/plugin/notes/notes.js HTTP/1.1
## ---
## 9982: GET /presentations/logstash-puppetconf-2012/images/stats-negative-min.png HTTP/1.1
## 9983: GET /presentations/logstash-puppetconf-2012/images/logs.jpg HTTP/1.1
## 9984: GET /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png HTTP/1.1
## 9985: GET /favicon.ico HTTP/1.1
## 9986: GET /robots.txt HTTP/1.1
## status respsize
## 1: 200 203023
## 2: 200 171717
## 3: 200 26185
## 4: 200 7697
## 5: 200 2892
## ---
## 9982: 200 46139
## 9983: 200 663847
## 9984: 200 97173
## 9985: 200 3638
## 9986: 200 NA
## referer
## 1: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 2: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 3: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 4: http://semicomplete.com/presentations/logstash-monitorama-2013/
## 5: http://semicomplete.com/presentations/logstash-monitorama-2013/
## ---
## 9982: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9983: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9984: http://semicomplete.com/presentations/logstash-puppetconf-2012/
## 9985: NA
## 9986: NA
## agent
## 1: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 2: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 3: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 4: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## 5: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36
## ---
## 9982: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9983: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9984: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9985: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36
## 9986: Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2
## hour wday morning weekend
## 1: 10 1 TRUE TRUE
## 2: 10 1 TRUE TRUE
## 3: 10 1 TRUE TRUE
## 4: 10 1 TRUE TRUE
## 5: 10 1 TRUE TRUE
## ---
## 9982: 21 4 FALSE FALSE
## 9983: 21 4 FALSE FALSE
## 9984: 21 4 FALSE FALSE
## 9985: 21 4 FALSE FALSE
## 9986: 21 4 FALSE FALSE
## as
## 1: AS25159 PJSC MegaFon
## 2: AS25159 PJSC MegaFon
## 3: AS25159 PJSC MegaFon
## 4: AS25159 PJSC MegaFon
## 5: AS25159 PJSC MegaFon
## ---
## 9982: AS174 Cogent Communications
## 9983: AS174 Cogent Communications
## 9984: AS174 Cogent Communications
## 9985: AS174 Cogent Communications
## 9986: AS55967 Beijing Baidu Netcom Science and Technology Co., Ltd.
## city country countryCode
## 1: Moscow Russia RU
## 2: Moscow Russia RU
## 3: Moscow Russia RU
## 4: Moscow Russia RU
## 5: Moscow Russia RU
## ---
## 9982: Miami United States US
## 9983: Miami United States US
## 9984: Miami United States US
## 9985: Miami United States US
## 9986: Beijing China CN
## isp lat lon
## 1: PJSC MegaFon 55.7522 37.6156
## 2: PJSC MegaFon 55.7522 37.6156
## 3: PJSC MegaFon 55.7522 37.6156
## 4: PJSC MegaFon 55.7522 37.6156
## 5: PJSC MegaFon 55.7522 37.6156
## ---
## 9982: Cogent Communications 25.8530 -80.2348
## 9983: Cogent Communications 25.8530 -80.2348
## 9984: Cogent Communications 25.8530 -80.2348
## 9985: Cogent Communications 25.8530 -80.2348
## 9986: Beijing Baidu Netcom Science and Technology Co. 39.9289 116.3883
## org region regionName
## 1: PJSC MegaFon MOW Moscow
## 2: PJSC MegaFon MOW Moscow
## 3: PJSC MegaFon MOW Moscow
## 4: PJSC MegaFon MOW Moscow
## 5: PJSC MegaFon MOW Moscow
## ---
## 9982: Cogent Communications FL Florida
## 9983: Cogent Communications FL Florida
## 9984: Cogent Communications FL Florida
## 9985: Cogent Communications FL Florida
## 9986: Beijing Baidu Netcom Science and Technology Co. 11 Beijing
## timezone zip verb
## 1: Europe/Moscow 101194 GET
## 2: Europe/Moscow 101194 GET
## 3: Europe/Moscow 101194 GET
## 4: Europe/Moscow 101194 GET
## 5: Europe/Moscow 101194 GET
## ---
## 9982: America/New_York 33147 GET
## 9983: America/New_York 33147 GET
## 9984: America/New_York 33147 GET
## 9985: America/New_York 33147 GET
## 9986: Asia/Shanghai GET
## url
## 1: /presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: /presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: /presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: /presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: /presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: /presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: /favicon.ico
## 9986: /robots.txt
## scheme
## 1: HTTP/1.1
## 2: HTTP/1.1
## 3: HTTP/1.1
## 4: HTTP/1.1
## 5: HTTP/1.1
## ---
## 9982: HTTP/1.1
## 9983: HTTP/1.1
## 9984: HTTP/1.1
## 9985: HTTP/1.1
## 9986: HTTP/1.1
## path
## 1: presentations/logstash-monitorama-2013/images/kibana-search.png
## 2: presentations/logstash-monitorama-2013/images/kibana-dashboard3.png
## 3: presentations/logstash-monitorama-2013/plugin/highlight/highlight.js
## 4: presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js
## 5: presentations/logstash-monitorama-2013/plugin/notes/notes.js
## ---
## 9982: presentations/logstash-puppetconf-2012/images/stats-negative-min.png
## 9983: presentations/logstash-puppetconf-2012/images/logs.jpg
## 9984: presentations/logstash-puppetconf-2012/images/apache-negative-duration.png
## 9985: favicon.ico
## 9986: robots.txt
## params order visit timesinceprevrequest
## 1: NA 2 1 3 secs
## 2: NA 14 1 9 secs
## 3: NA 16 1 1 secs
## 4: NA 5 1 1 secs
## 5: NA 3 1 4 secs
## ---
## 9982: NA 24 1847 6 secs
## 9983: NA 23 1847 1 secs
## 9984: NA 14 1847 2 secs
## 9985: NA 15 1847 2 secs
## 9986: NA 1 1852 NA secs
logs[visit==1, .(order, time, timesinceprevrequest )]
## order time timesinceprevrequest
## 1: 2 2015-05-17 10:05:03 3 secs
## 2: 14 2015-05-17 10:05:43 9 secs
## 3: 16 2015-05-17 10:05:47 1 secs
## 4: 5 2015-05-17 10:05:12 1 secs
## 5: 3 2015-05-17 10:05:07 4 secs
## 6: 13 2015-05-17 10:05:34 1 secs
## 7: 22 2015-05-17 10:05:57 1 secs
## 8: 17 2015-05-17 10:05:50 3 secs
## 9: 7 2015-05-17 10:05:24 5 secs
## 10: 18 2015-05-17 10:05:50 0 secs
## 11: 15 2015-05-17 10:05:46 3 secs
## 12: 4 2015-05-17 10:05:11 4 secs
## 13: 6 2015-05-17 10:05:19 7 secs
## 14: 11 2015-05-17 10:05:33 3 secs
## 15: 1 2015-05-17 10:05:00 NA secs
## 16: 9 2015-05-17 10:05:25 1 secs
## 17: 23 2015-05-17 10:05:59 2 secs
## 18: 10 2015-05-17 10:05:30 5 secs
## 19: 19 2015-05-17 10:05:53 3 secs
## 20: 8 2015-05-17 10:05:24 0 secs
## 21: 20 2015-05-17 10:05:54 1 secs
## 22: 12 2015-05-17 10:05:33 0 secs
## 23: 21 2015-05-17 10:05:56 2 secs
## order time timesinceprevrequest
library(ggmap)
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
## The following object is masked from 'package:magrittr':
##
## inset
ggplot(map_data('world')) +
geom_polygon(aes(x = long, y = lat, group = group), fill = 'grey90', colour = 'white') +
geom_point(aes(x = lon, y = lat, size = N), color = '#2165B6',
data = logs[, .N, .(lon, lat)]) +
xlab('') + ylab('') +
theme_minimal() + theme('legend.position' = 'top')
library(ggplot2)
heatmap<-function(ggplot,size=20){
ggplot+ coord_equal()+
geom_tile(color="white", size=0.1)+
labs(x=NULL, y=NULL, title=NULL)+
scale_x_continuous(breaks=seq(0,24,6))+
scale_fill_gradient()
}
ip_activity<-logs[,.N,.(country,hour)]
ga<-ggplot(ip_activity[country %like% "^A"], aes(x=hour, y=country, fill=N))
heatmap(ga)
library(DiagrammeR)
URLids<-logs[,.N,.(labels_col=path)][,nodes:=.I][N>50]
activity<-URLids[logs, on=c(labels_col="path")][
!is.na(nodes),.(visit, order, nodes)]
# Get a cross join of activity
moves<-activity[activity, on=c("visit"), allow.cartesian=TRUE][
# Filter to only include next site
order==i.order-1][
# Get nodes and position
,.(tooltip=.N),.(from=nodes,to=i.nodes)][,penwidth:=10*tooltip/max(tooltip)]
gr<-create_graph(setDF(URLids), setDF(moves))
render_graph(gr)
top10<-logs[,.N,country][order(-N)[1:10],country]
tz_ts<-logs[country %in% top10,.N,.(country, xts::align.time(time,n=60*5))]
ggplot(tz_ts, aes(x=xts, y=N, group=1))+
geom_line()+
geom_smooth()+
facet_wrap(~country, scales="free_y")
## `geom_smooth()` using method = 'loess'
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following object is masked from 'package:data.table':
##
## last
## The following objects are masked from 'package:dplyr':
##
## first, last
ts<-logs[order(time),.N,.(time=xts::align.time(time,n=60))]
xts_df<-xts(ts$N,ts$time)
plot(xts_df)
devtools::install_github("twitter/AnomalyDetection")
## Using GitHub PAT from envvar GITHUB_PAT
## Downloading GitHub repo twitter/AnomalyDetection@master
## from URL https://api.github.com/repos/twitter/AnomalyDetection/zipball/master
## Installing AnomalyDetection
## '/home/travis/R-bin/lib/R/bin/R' --no-site-file --no-environ --no-save \
## --no-restore --quiet CMD INSTALL \
## '/tmp/RtmpxvtyVV/devtoolsfabb1be44e42/twitter-AnomalyDetection-1f5deaa' \
## --library='/home/travis/R/Library' --install-tests
##
library(AnomalyDetection)
AnomalyDetectionTs(setDF(logs[,.N,align.time(time)]), max_anoms=0.05, direction='both',plot=TRUE)
## $anoms
## timestamp anoms
## 1 2015-05-17 10:06:00 74
## 2 2015-05-18 10:06:00 132
## 3 2015-05-19 14:06:00 134
## 4 2015-05-20 21:06:00 78
##
## $plot